{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# pandas的数据结构介绍\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from pandas import DataFrame\n",
    "from pandas import Series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    4\n",
      "1    7\n",
      "2   -5\n",
      "3    3\n",
      "dtype: int64\n",
      "[ 4  7 -5  3]\n",
      "RangeIndex(start=0, stop=4, step=1)\n"
     ]
    }
   ],
   "source": [
    "obj = Series([4, 7, -5, 3])\n",
    "print(obj)\n",
    "print(obj.values)\n",
    "print(obj.index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "d    4\n",
      "b    7\n",
      "a   -5\n",
      "c    3\n",
      "dtype: int64\n",
      "-5\n"
     ]
    }
   ],
   "source": [
    "obj2 = Series([4, 7, -5, 3], index=['d', 'b', 'a', 'c'])\n",
    "print(obj2)\n",
    "print(obj2['a'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "c    3\n",
      "a   -5\n",
      "d    6\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "obj2['d'] = 6\n",
    "print(obj2[['c', 'a', 'd']])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "d    6\n",
       "b    7\n",
       "c    3\n",
       "dtype: int64"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "obj2[obj2 > 0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "d    12\n",
       "b    14\n",
       "a   -10\n",
       "c     6\n",
       "dtype: int64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "obj2 * 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "d     403.428793\n",
       "b    1096.633158\n",
       "a       0.006738\n",
       "c      20.085537\n",
       "dtype: float64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.exp(obj2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "True\n",
      "False\n"
     ]
    }
   ],
   "source": [
    "print('b' in obj2)\n",
    "print('e' in obj2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Ohio      35000\n",
       "Oregon    16000\n",
       "Texas     71000\n",
       "Utah       5000\n",
       "dtype: int64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Dict -> Series\n",
    "sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}\n",
    "obj3 = Series(sdata)\n",
    "obj3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "California        NaN\n",
       "Ohio          35000.0\n",
       "Oregon        16000.0\n",
       "Texas         71000.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "states = ['California', 'Ohio', 'Oregon', 'Texas']\n",
    "obj4 = Series(sdata, index=states) # 自动与dict的key匹配\n",
    "obj4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "California     True\n",
      "Ohio          False\n",
      "Oregon        False\n",
      "Texas         False\n",
      "dtype: bool\n",
      "California    False\n",
      "Ohio           True\n",
      "Oregon         True\n",
      "Texas          True\n",
      "dtype: bool\n"
     ]
    }
   ],
   "source": [
    "print(pd.isnull(obj4))\n",
    "print(pd.notnull(obj4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "California         NaN\n",
      "Ohio           70000.0\n",
      "Oregon         32000.0\n",
      "Texas         142000.0\n",
      "Utah               NaN\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "print(obj3 + obj4) # 数据自动对齐"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "州\n",
       "California        NaN\n",
       "Ohio          35000.0\n",
       "Oregon        16000.0\n",
       "Texas         71000.0\n",
       "Name: 人口, dtype: float64"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "obj4.name = '人口'\n",
    "obj4.index.name = '州'\n",
    "obj4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Bob      4\n",
       "Steve    7\n",
       "Jeff    -5\n",
       "Ryan     3\n",
       "dtype: int64"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "obj = Series([4, 7, -5, 3])\n",
    "obj.index = ['Bob', 'Steve', 'Jeff', 'Ryan'] # 更新索引\n",
    "obj"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pop</th>\n",
       "      <th>state</th>\n",
       "      <th>year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.5</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>2000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.7</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>2001</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3.6</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>2002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2.4</td>\n",
       "      <td>Nevada</td>\n",
       "      <td>2001</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2.9</td>\n",
       "      <td>Nevada</td>\n",
       "      <td>2002</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   pop   state  year\n",
       "0  1.5    Ohio  2000\n",
       "1  1.7    Ohio  2001\n",
       "2  3.6    Ohio  2002\n",
       "3  2.4  Nevada  2001\n",
       "4  2.9  Nevada  2002"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],\n",
    "        'year': [2000, 2001, 2002, 2001, 2002],\n",
    "        'pop': [1.5, 1.7, 3.6, 2.4, 2.9]}\n",
    "frame = DataFrame(data) # key对应frame的列名\n",
    "frame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>year</th>\n",
       "      <th>state</th>\n",
       "      <th>pop</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2000</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>1.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2001</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>1.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2002</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>3.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2001</td>\n",
       "      <td>Nevada</td>\n",
       "      <td>2.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2002</td>\n",
       "      <td>Nevada</td>\n",
       "      <td>2.9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   year   state  pop\n",
       "0  2000    Ohio  1.5\n",
       "1  2001    Ohio  1.7\n",
       "2  2002    Ohio  3.6\n",
       "3  2001  Nevada  2.4\n",
       "4  2002  Nevada  2.9"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame = DataFrame(data, columns=['year', 'state', 'pop']) # 指定列顺序\n",
    "frame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>year</th>\n",
       "      <th>state</th>\n",
       "      <th>pop</th>\n",
       "      <th>debt</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>one</th>\n",
       "      <td>2000</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>1.5</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two</th>\n",
       "      <td>2001</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>1.7</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>three</th>\n",
       "      <td>2002</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>3.6</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>four</th>\n",
       "      <td>2001</td>\n",
       "      <td>Nevada</td>\n",
       "      <td>2.4</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>five</th>\n",
       "      <td>2002</td>\n",
       "      <td>Nevada</td>\n",
       "      <td>2.9</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       year   state  pop debt\n",
       "one    2000    Ohio  1.5  NaN\n",
       "two    2001    Ohio  1.7  NaN\n",
       "three  2002    Ohio  3.6  NaN\n",
       "four   2001  Nevada  2.4  NaN\n",
       "five   2002  Nevada  2.9  NaN"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame2 = DataFrame(data,\n",
    "                   columns=['year', 'state', 'pop', 'debt'],\n",
    "                   index=['one', 'two', 'three', 'four', 'five']) # 分别指定行列名字，缺失值自动填充，比如debt列。\n",
    "frame2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "one        Ohio\n",
      "two        Ohio\n",
      "three      Ohio\n",
      "four     Nevada\n",
      "five     Nevada\n",
      "Name: state, dtype: object\n",
      "one      2000\n",
      "two      2001\n",
      "three    2002\n",
      "four     2001\n",
      "five     2002\n",
      "Name: year, dtype: int64\n",
      "<class 'pandas.core.series.Series'>\n"
     ]
    }
   ],
   "source": [
    "print(frame2['state']) # 通过索引返回指定列，返回类型为Series\n",
    "print(frame2.year)\n",
    "print(type(frame.state))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "year     2002\n",
      "state    Ohio\n",
      "pop       3.6\n",
      "debt      NaN\n",
      "Name: three, dtype: object\n",
      "year     2000\n",
      "state    Ohio\n",
      "pop       1.5\n",
      "debt      NaN\n",
      "Name: one, dtype: object\n"
     ]
    }
   ],
   "source": [
    "print(frame2.loc['three']) # 使用loc访问行，iloc针对默认的数字索引\n",
    "print(frame2.iloc[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>year</th>\n",
       "      <th>state</th>\n",
       "      <th>pop</th>\n",
       "      <th>debt</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>one</th>\n",
       "      <td>2000</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>1.5</td>\n",
       "      <td>16.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two</th>\n",
       "      <td>2001</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>1.7</td>\n",
       "      <td>16.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>three</th>\n",
       "      <td>2002</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>3.6</td>\n",
       "      <td>16.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>four</th>\n",
       "      <td>2001</td>\n",
       "      <td>Nevada</td>\n",
       "      <td>2.4</td>\n",
       "      <td>16.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>five</th>\n",
       "      <td>2002</td>\n",
       "      <td>Nevada</td>\n",
       "      <td>2.9</td>\n",
       "      <td>16.5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       year   state  pop  debt\n",
       "one    2000    Ohio  1.5  16.5\n",
       "two    2001    Ohio  1.7  16.5\n",
       "three  2002    Ohio  3.6  16.5\n",
       "four   2001  Nevada  2.4  16.5\n",
       "five   2002  Nevada  2.9  16.5"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame2['debt'] = 16.5 # 修改整列值\n",
    "frame2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>year</th>\n",
       "      <th>state</th>\n",
       "      <th>pop</th>\n",
       "      <th>debt</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>one</th>\n",
       "      <td>2000</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two</th>\n",
       "      <td>2001</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>1.7</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>three</th>\n",
       "      <td>2002</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>3.6</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>four</th>\n",
       "      <td>2001</td>\n",
       "      <td>Nevada</td>\n",
       "      <td>2.4</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>five</th>\n",
       "      <td>2002</td>\n",
       "      <td>Nevada</td>\n",
       "      <td>2.9</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       year   state  pop  debt\n",
       "one    2000    Ohio  1.5   0.0\n",
       "two    2001    Ohio  1.7   1.0\n",
       "three  2002    Ohio  3.6   2.0\n",
       "four   2001  Nevada  2.4   3.0\n",
       "five   2002  Nevada  2.9   4.0"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame2['debt'] = np.arange(5.)\n",
    "frame2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>year</th>\n",
       "      <th>state</th>\n",
       "      <th>pop</th>\n",
       "      <th>debt</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>one</th>\n",
       "      <td>2000</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>1.5</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two</th>\n",
       "      <td>2001</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>1.7</td>\n",
       "      <td>-1.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>three</th>\n",
       "      <td>2002</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>3.6</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>four</th>\n",
       "      <td>2001</td>\n",
       "      <td>Nevada</td>\n",
       "      <td>2.4</td>\n",
       "      <td>-1.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>five</th>\n",
       "      <td>2002</td>\n",
       "      <td>Nevada</td>\n",
       "      <td>2.9</td>\n",
       "      <td>-1.7</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       year   state  pop  debt\n",
       "one    2000    Ohio  1.5   NaN\n",
       "two    2001    Ohio  1.7  -1.2\n",
       "three  2002    Ohio  3.6   NaN\n",
       "four   2001  Nevada  2.4  -1.5\n",
       "five   2002  Nevada  2.9  -1.7"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "val = Series([-1.2, -1.5, -1.7], index=['two', 'four', 'five'])\n",
    "frame2['debt'] = val # 索引不匹配的话自动补NaN\n",
    "frame2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>year</th>\n",
       "      <th>state</th>\n",
       "      <th>pop</th>\n",
       "      <th>debt</th>\n",
       "      <th>eastern</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>one</th>\n",
       "      <td>2000</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>1.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two</th>\n",
       "      <td>2001</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>1.7</td>\n",
       "      <td>-1.2</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>three</th>\n",
       "      <td>2002</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>3.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>four</th>\n",
       "      <td>2001</td>\n",
       "      <td>Nevada</td>\n",
       "      <td>2.4</td>\n",
       "      <td>-1.5</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>five</th>\n",
       "      <td>2002</td>\n",
       "      <td>Nevada</td>\n",
       "      <td>2.9</td>\n",
       "      <td>-1.7</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       year   state  pop  debt  eastern\n",
       "one    2000    Ohio  1.5   NaN     True\n",
       "two    2001    Ohio  1.7  -1.2     True\n",
       "three  2002    Ohio  3.6   NaN     True\n",
       "four   2001  Nevada  2.4  -1.5    False\n",
       "five   2002  Nevada  2.9  -1.7    False"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame2['eastern'] = frame2.state == 'Ohio' # 根据其它列的条件赋布尔值\n",
    "frame2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['year', 'state', 'pop', 'debt'], dtype='object')"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "del frame2['eastern'] # 删除指定列\n",
    "frame2.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Nevada</th>\n",
       "      <th>Ohio</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2000</th>\n",
       "      <td>NaN</td>\n",
       "      <td>1.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2001</th>\n",
       "      <td>2.4</td>\n",
       "      <td>1.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2002</th>\n",
       "      <td>2.9</td>\n",
       "      <td>3.6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      Nevada  Ohio\n",
       "2000     NaN   1.5\n",
       "2001     2.4   1.7\n",
       "2002     2.9   3.6"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pop = {'Nevada': {2001: 2.4, 2002: 2.9},\n",
    "       'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}}\n",
    "frame3 = DataFrame(pop) # 通过嵌套字典指定列和行索引\n",
    "frame3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>2000</th>\n",
       "      <th>2001</th>\n",
       "      <th>2002</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Nevada</th>\n",
       "      <td>NaN</td>\n",
       "      <td>2.4</td>\n",
       "      <td>2.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Ohio</th>\n",
       "      <td>1.5</td>\n",
       "      <td>1.7</td>\n",
       "      <td>3.6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        2000  2001  2002\n",
       "Nevada   NaN   2.4   2.9\n",
       "Ohio     1.5   1.7   3.6"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame3.T # 转置"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Nevada</th>\n",
       "      <th>Ohio</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2001</th>\n",
       "      <td>2.4</td>\n",
       "      <td>1.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2002</th>\n",
       "      <td>2.9</td>\n",
       "      <td>3.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2003</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      Nevada  Ohio\n",
       "2001     2.4   1.7\n",
       "2002     2.9   3.6\n",
       "2003     NaN   NaN"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "DataFrame(pop, index=[2001, 2002, 2003]) # 索引2003匹配不到，自动填充NaN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Nevada</th>\n",
       "      <th>Ohio</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2000</th>\n",
       "      <td>NaN</td>\n",
       "      <td>1.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2001</th>\n",
       "      <td>2.4</td>\n",
       "      <td>1.7</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      Nevada  Ohio\n",
       "2000     NaN   1.5\n",
       "2001     2.4   1.7"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pdata = {'Ohio': frame3['Ohio'][:-1],\n",
    "         'Nevada': frame3['Nevada'][:2]} # 使用Series替代普通数组\n",
    "DataFrame(pdata)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>state</th>\n",
       "      <th>Nevada</th>\n",
       "      <th>Ohio</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>year</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2000</th>\n",
       "      <td>NaN</td>\n",
       "      <td>1.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2001</th>\n",
       "      <td>2.4</td>\n",
       "      <td>1.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2002</th>\n",
       "      <td>2.9</td>\n",
       "      <td>3.6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "state  Nevada  Ohio\n",
       "year               \n",
       "2000      NaN   1.5\n",
       "2001      2.4   1.7\n",
       "2002      2.9   3.6"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame3.index.name = 'year' # 设置索引和列的名字\n",
    "frame3.columns.name = 'state'\n",
    "frame3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ nan  1.5]\n",
      " [ 2.4  1.7]\n",
      " [ 2.9  3.6]]\n",
      "<class 'numpy.ndarray'>\n"
     ]
    }
   ],
   "source": [
    "print(frame3.values) # 以numpy数组形式返回values\n",
    "print(type(frame3.values))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 索引对象"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    1.5\n",
      "1   -2.5\n",
      "2    0.0\n",
      "dtype: float64\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "index = pd.Index(np.arange(3))\n",
    "obj2 = Series([1.5, -2.5, 0], index=index)\n",
    "print(obj2)\n",
    "obj2.index is index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "True\n",
      "False\n"
     ]
    }
   ],
   "source": [
    "print('Ohio' in frame3.columns)\n",
    "print(2003 in frame3.index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Index的方法和属性\n",
    "# append：      连接另一个Index对象，产生一个新的Index。\n",
    "# diff：        计算差集，并得到一个Index。\n",
    "# intersection：计算交集\n",
    "# union：       计算并集\n",
    "# isin：        计算一个指示各值是否都包含在参数集合中的布尔型数组\n",
    "# delete：      删除索引i处的元素，并得到新的Index。\n",
    "# drop：        删除传入的值，并得到新的Index。\n",
    "# insert：      将元素插入到索引i处，并得到新的Index。\n",
    "# is_monotonic：如果单调增长，返回True。\n",
    "# is_unique：   当Index没有重复值时，返回True。\n",
    "# unique：      计算Index中唯一值得数组"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
